R Markdown

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.6     ✓ stringr 1.4.0
## ✓ tidyr   1.2.0     ✓ forcats 0.5.1
## ✓ readr   2.1.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(nhdplusTools)
## USGS Support Package: https://owi.usgs.gov/R/packages.html#support
library(mapview)
# Comparing PRMS NLCD proportions dataset wih FORESCE
targets::tar_make(p2_PRMS_NLCD_lc_proportions_reclass_cat)
## ✓ skip target p2_prms_nhdv2_xwalk
## ✓ skip target p1_nhdv2reaches_sf
## ✓ skip target p2_drb_comids_all_tribs
## ✓ skip target p1_NLCD_LC_data
## ✓ skip target p2_NLCD_LC_w_catchment_area
## ✓ skip target p2_PRMS_NLCD_lc_proportions_cat
## ✓ skip target p2_PRMS_NLCD_lc_proportions_reclass_cat
## ✓ skip pipeline
targets::tar_make(p2_FORESCE_LC_per_catchment_reclass_cat)
## ✓ skip target p1_FORESCE_backcasted_LC
## ✓ skip target p1_catchments_edited_gpkg
## ✓ skip target p1_catchments_edited_sf
## ✓ skip target p2_FORESCE_LC_per_catchment
## ✓ skip target p2_FORESCE_LC_per_catchment_reclass_cat
## ✓ skip pipeline
targets::tar_load(p2_PRMS_NLCD_lc_proportions_reclass_cat)
targets::tar_load(p2_FORESCE_LC_per_catchment_reclass_cat)

targets::tar_make(p1_catchments_edited_sf)
## ✓ skip target p1_catchments_edited_gpkg
## ✓ skip target p1_catchments_edited_sf
## ✓ skip pipeline
targets::tar_load(p1_catchments_edited_sf)
targets::tar_make(p1_reaches_sf)
## ✓ skip target p1_reaches_shp_zip
## ✓ skip target p1_reaches_shp
## ✓ skip target p1_reaches_sf
## ✓ skip pipeline
targets::tar_load(p1_reaches_sf)

###—-

# subset/filter and mutate. We choose year 2000 for FORESCE and 2001 for NLCD

##2000
l1 <- p2_FORESCE_LC_per_catchment_reclass_cat[[7]] %>%
  #  select(PRMS_segid, Year, everything(), -hru_segment) %>% 
  mutate(total_PRMS_area_km2 = total_PRMS_area/10^6) %>%
  select(PRMS_segid, hru_segment, total_PRMS_area_km2)
## 2001
l2 <- p2_PRMS_NLCD_lc_proportions_reclass_cat[[1]] %>%
  # select(PRMS_segid, Year, everything(), -AREASQKM_PRMS) %>% 
  select(PRMS_segid, AREASQKM_PRMS)

head(l1)
## # A tibble: 6 × 3
## # Groups:   PRMS_segid [6]
##   PRMS_segid hru_segment total_PRMS_area_km2
##   <chr>      <chr>                     <dbl>
## 1 1_1        1                         63.8 
## 2 10_1       10                         5.41
## 3 11_1       11                         6.02
## 4 111_1      111                       41.7 
## 5 112_1      112                      270.  
## 6 113_1      113                       21.0
head(l2)
## # A tibble: 6 × 2
##   PRMS_segid AREASQKM_PRMS
##   <chr>              <dbl>
## 1 1_1                65.0 
## 2 10_1                5.47
## 3 11_1                5.94
## 4 111_1              41.6 
## 5 112_1             271.  
## 6 113_1              21.1
# Merge l1 and l2 and calculate different for each PRMS SEGMENT
df <- l1 %>% left_join(l2, by = 'PRMS_segid') %>% 
  rename(PRMS_area_km2_FOR = total_PRMS_area_km2, PRMS_area_km2_NLCD = AREASQKM_PRMS) %>% 
  mutate(ab_diff = abs(PRMS_area_km2_FOR - PRMS_area_km2_NLCD)) %>%
  mutate(ab_diff_categorized = factor(case_when(ab_diff > 50 ~ '>50 km2',
                              ab_diff > 10 ~ '>10 km2',
                              ab_diff > 5 ~ '>5 km2',
                              ab_diff >1 ~ '>1 km2',
                              TRUE ~ '< 1 km2'),
                              levels = c('< 1 km2', '>1 km2', '>5 km2', '>10 km2', '>50 km2'))) 


dim(df)
## [1] 459   6
## Plotting
scatterplot1 <- ggplot(df, aes(y = PRMS_area_km2_FOR, x = PRMS_area_km2_NLCD))+
  geom_point(aes(color = ab_diff_categorized))+theme_bw()+
  ylab('PRMS Cat Area - geopackage')+
  xlab('PRMS Cat Area - nhd catchment aggregation')+
  labs(caption = 'Scatter plot of PRMS area in the NLCD dataset versus FORESCE.\n Point colored by their difference categories')+
  theme(plot.caption = element_text(hjust = 0.5))
scatterplot1  

bar_plot_diff_bins <- ggplot(df, aes(x = ab_diff_categorized))+
  geom_bar(fill = 'lightblue4')+
  scale_y_continuous(n.breaks = 15)+
  theme_bw()+
  labs(caption ='\n Ab difference between area (km2) of FORESCE land use prop dataset\n (2000) and NLCD land use dataset (2001) grouped into 4 categories')+
  theme(plot.caption = element_text(hjust = 0.5))
bar_plot_diff_bins

###—-

## Extract rows that are problematic
over_50_km2 <- df %>% filter(ab_diff_categorized == '>50 km2') %>% select(PRMS_segid)
over_10_km2 <- df %>% filter(ab_diff_categorized == '>10 km2') %>% select(PRMS_segid)

## Pulling target that maps all comids within each PRMS
targets::tar_make(p2_drb_comids_all_tribs)
## ✓ skip target p2_prms_nhdv2_xwalk
## ✓ skip target p2_drb_comids_all_tribs
## ✓ skip pipeline
targets::tar_load(p2_drb_comids_all_tribs)

## subsetting to the comids that have differing areas
comids_over_50_km2 <- p2_drb_comids_all_tribs %>%
  filter(PRMS_segid %in% over_50_km2$PRMS_segid)

head(comids_over_50_km2)
## # A tibble: 6 × 2
##   PRMS_segid comid  
##   <chr>      <chr>  
## 1 12_1       1748745
## 2 130_1      2614146
## 3 142_1      2617440
## 4 156_1      2739776
## 5 157_1      2739496
## 6 157_1      2739498
# Check whether incorrect comids originally had comids of area 0
targets::tar_make(p2_NLCD_LC_w_catchment_area)
## ✓ skip target p2_prms_nhdv2_xwalk
## ✓ skip target p1_nhdv2reaches_sf
## ✓ skip target p2_drb_comids_all_tribs
## ✓ skip target p1_NLCD_LC_data
## ✓ skip target p2_NLCD_LC_w_catchment_area
## ✓ skip pipeline
targets::tar_load(p2_NLCD_LC_w_catchment_area)
## Subset the source dataset for NLCD w/ area 
PRMS_areasqkm_0 <- p2_NLCD_LC_w_catchment_area %>%
  filter(AREASQKM == 0) %>% select(PRMS_segid, comid, AREASQKM)
PRMS_areasqkm_0
## # A tibble: 519 × 3
##    PRMS_segid comid   AREASQKM
##    <chr>      <chr>      <dbl>
##  1 119_1      2614050        0
##  2 121_1      2613448        0
##  3 121_1      2613516        0
##  4 121_1      2613552        0
##  5 121_1      2613598        0
##  6 121_1      2613616        0
##  7 1257_1     8077794        0
##  8 1257_1     8080122        0
##  9 126_1      2614082        0
## 10 1263_1     8077902        0
## # … with 509 more rows
# Print list of PRMS that originally had areasqkm of 0 and that got an total_area val of lengthkm^2 
PRMS_areasqkm_0 %>% 
       mutate(incorrect = case_when(PRMS_segid %in%
                                      comids_over_50_km2$PRMS_segid ~ "TRUE",
                                    TRUE ~ 'FALSE')) %>% 
  filter(incorrect == TRUE)
## # A tibble: 5 × 4
##   PRMS_segid comid   AREASQKM incorrect
##   <chr>      <chr>      <dbl> <chr>    
## 1 216_1      4150296        0 TRUE     
## 2 226_1      4151818        0 TRUE     
## 3 386_1      4496244        0 TRUE     
## 4 878_1      4782271        0 TRUE     
## 5 892_1      4782567        0 TRUE
## Grab comids with over 50 km2 difference using nhdplustools
all_comids <- get_nhdplus(comid = comids_over_50_km2$comid, realization = 'catchment') %>% 
  mutate(featureid = as.character(featureid))

## join with our comids_over_50km2 to have prms col
comids_over_50_km2 <- comids_over_50_km2 %>%
  left_join(all_comids, by= c('comid' = 'featureid'),
            keep = TRUE) %>% 
  ## make sf object
  sf::st_as_sf()

sample_n(comids_over_50_km2,10)
## Simple feature collection with 10 features and 9 fields
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -75.65283 ymin: 39.58057 xmax: -74.59042 ymax: 41.63942
## Geodetic CRS:  WGS 84
## # A tibble: 10 × 10
##    PRMS_segid comid   id       gridcode featureid sourcefc areasqkm shape_length
##  * <chr>      <chr>   <chr>       <int> <chr>     <chr>       <dbl>        <dbl>
##  1 257_1      4186351 catchme…  1687250 4186351   NHDFlow…  0.0395       0.0110 
##  2 2754_1     4651262 catchme…  1692127 4651262   NHDFlow…  0.446        0.0329 
##  3 386_1      4496234 catchme…  1688097 4496234   NHDFlow…  0.219        0.0210 
##  4 216_1      4150298 catchme…  1682629 4150298   NHDFlow…  0.00779      0.00440
##  5 2756_1     9480964 catchme…  1693676 9480964   NHDFlow…  0.178        0.0198 
##  6 219_1      4150236 catchme…  1682626 4150236   NHDFlow…  0.0727       0.0129 
##  7 2756_1     9482240 catchme…  1693651 9482240   NHDFlow…  0.00325      0.00260
##  8 386_1      4494962 catchme…  1689519 4494962   NHDFlow…  0.821        0.0525 
##  9 157_1      2739540 catchme…  1681707 2739540   NHDFlow…  6.42         0.137  
## 10 157_1      2739496 catchme…  1681572 2739496   NHDFlow…  3.76         0.0981 
## # … with 2 more variables: shape_area <dbl>, geometry <POLYGON [°]>
catchment_gpkg <- p1_catchments_edited_sf %>% filter(PRMS_segid %in% unique(comids_over_50_km2$PRMS_segid))
reach <- p1_reaches_sf %>% filter(subsegid %in% unique(comids_over_50_km2$PRMS_segid))

mapview(comids_over_50_km2, col.regions = 'red', alpha.regions = 0.7)+
  mapview(catchment_gpkg)+
  mapview(reach, color = 'black')
## Assess specific PRMS_segid - 12_1
prms_seg <- '12_1'
print(df[,1:3][df$PRMS_segid == prms_seg,])
## # A tibble: 1 × 3
## # Groups:   PRMS_segid [1]
##   PRMS_segid hru_segment PRMS_area_km2_FOR
##   <chr>      <chr>                   <dbl>
## 1 12_1       7                        67.1
subsetted_comids_over_50km2 <- comids_over_50_km2 %>% filter(PRMS_segid == prms_seg)
catchment <- p1_catchments_edited_sf %>% filter(PRMS_segid == unique(subsetted_comids_over_50km2$PRMS_segid))
reach <- p1_reaches_sf %>% filter(subsegid == unique(subsetted_comids_over_50km2$PRMS_segid))
                                  
mapview(subsetted_comids_over_50km2,
        col.regions = 'red', alpha.regions = 0.5) +
  mapview(catchment)+ mapview(reach, color = 'black')
## 157_1
prms_seg <- '157_1'
print(df[,1:3][df$PRMS_segid == prms_seg,])
## # A tibble: 1 × 3
## # Groups:   PRMS_segid [1]
##   PRMS_segid hru_segment PRMS_area_km2_FOR
##   <chr>      <chr>                   <dbl>
## 1 157_1      157                      103.
subsetted_comids_over_50km2 <- comids_over_50_km2 %>% filter(PRMS_segid == prms_seg)
catchment <- p1_catchments_edited_sf %>% filter(PRMS_segid == unique(subsetted_comids_over_50km2$PRMS_segid))
reach <- p1_reaches_sf %>% filter(subsegid == unique(subsetted_comids_over_50km2$PRMS_segid))

mapview(subsetted_comids_over_50km2,
        col.regions = 'red', alpha.regions = 0.5) +
  mapview(catchment)+ mapview(reach, color = 'black')
comids_over_50_km2
## Simple feature collection with 68 features and 9 fields (with 5 geometries empty)
## Geometry type: POLYGON
## Dimension:     XY
## Bounding box:  xmin: -75.94924 ymin: 39.44927 xmax: -74.59042 ymax: 42.11488
## Geodetic CRS:  WGS 84
## # A tibble: 68 × 10
##    PRMS_segid comid   id       gridcode featureid sourcefc areasqkm shape_length
##    <chr>      <chr>   <chr>       <int> <chr>     <chr>       <dbl>        <dbl>
##  1 12_1       1748745 catchme…  1681538 1748745   NHDFlow…   0.318       0.0312 
##  2 130_1      2614146 catchme…  1679683 2614146   NHDFlow…   0.0339      0.0100 
##  3 142_1      2617440 catchme…  1680190 2617440   NHDFlow…   0.0106      0.00454
##  4 156_1      2739776 catchme…  1681708 2739776   NHDFlow…   2.90        0.0771 
##  5 157_1      2739496 catchme…  1681572 2739496   NHDFlow…   3.76        0.0981 
##  6 157_1      2739498 catchme…  1681529 2739498   NHDFlow…   0.386       0.0306 
##  7 157_1      2739500 catchme…  1681553 2739500   NHDFlow…   2.48        0.0759 
##  8 157_1      2739540 catchme…  1681707 2739540   NHDFlow…   6.42        0.137  
##  9 167_1      2743198 catchme…  1681768 2743198   NHDFlow…   1.48        0.0584 
## 10 216_1      4150290 catchme…  1684365 4150290   NHDFlow…   0.0537      0.0104 
## # … with 58 more rows, and 2 more variables: shape_area <dbl>,
## #   geometry <POLYGON [°]>
## 216_1
prms_seg <- '216_1'
print(df[,1:3][df$PRMS_segid == prms_seg,])
## # A tibble: 1 × 3
## # Groups:   PRMS_segid [1]
##   PRMS_segid hru_segment PRMS_area_km2_FOR
##   <chr>      <chr>                   <dbl>
## 1 216_1      223                      78.0
subsetted_comids_over_50km2 <- comids_over_50_km2 %>% filter(PRMS_segid == prms_seg)
catchment <- p1_catchments_edited_sf %>% filter(PRMS_segid == unique(subsetted_comids_over_50km2$PRMS_segid))
reach <- p1_reaches_sf %>% filter(subsegid == unique(subsetted_comids_over_50km2$PRMS_segid))

mapview(subsetted_comids_over_50km2,
        col.regions = 'red', alpha.regions = 0.5) +
  mapview(catchment)+ mapview(reach, color = 'black')
## 878_1
prms_seg <- '878_1'
print(df[,1:3][df$PRMS_segid == prms_seg,])
## # A tibble: 1 × 3
## # Groups:   PRMS_segid [1]
##   PRMS_segid hru_segment PRMS_area_km2_FOR
##   <chr>      <chr>                   <dbl>
## 1 878_1      880                      66.1
subsetted_comids_over_50km2 <- comids_over_50_km2 %>% filter(PRMS_segid == prms_seg)
catchment <- p1_catchments_edited_sf %>% filter(PRMS_segid == unique(subsetted_comids_over_50km2$PRMS_segid))
reach <- p1_reaches_sf %>% filter(subsegid == unique(subsetted_comids_over_50km2$PRMS_segid))

mapview(subsetted_comids_over_50km2,
        col.regions = 'red', alpha.regions = 0.5) +
  mapview(catchment)+ mapview(reach, color = 'black')
## 386_1
prms_seg <- '386_1'
print(df[,1:3][df$PRMS_segid == prms_seg,])
## # A tibble: 1 × 3
## # Groups:   PRMS_segid [1]
##   PRMS_segid hru_segment PRMS_area_km2_FOR
##   <chr>      <chr>                   <dbl>
## 1 386_1      387                      86.7
subsetted_comids_over_50km2 <- comids_over_50_km2 %>% filter(PRMS_segid == prms_seg)
catchment <- p1_catchments_edited_sf %>% filter(PRMS_segid == unique(subsetted_comids_over_50km2$PRMS_segid))
reach <- p1_reaches_sf %>% filter(subsegid == unique(subsetted_comids_over_50km2$PRMS_segid))

mapview(subsetted_comids_over_50km2,
        col.regions = 'red', alpha.regions = 0.5) +
  mapview(catchment)+ mapview(reach, color = 'black')
comids_386_1 <- p2_drb_comids_all_tribs %>% filter(PRMS_segid =='386_1')
comids_387_1 <- p2_drb_comids_all_tribs %>% filter(PRMS_segid =='387_1')

## Polygon is the same 
PRMS_catchments_386_387 <- p1_catchments_edited_sf[p1_catchments_edited_sf$PRMS_segid %in% c('386_1','387_1'),]

mapview(get_nhdplus(comid = comids_386_1$comid, realization = 'catchment'))+
  mapview(get_nhdplus(comid = comids_387_1$comid, realization = 'catchment'), col.regions = 'red')+
  mapview(PRMS_catchments_386_387, col.regions = 'green')

Explore the geopackage

## catchment for prms >50 km2 diff overlay on all catchments 
mapview(p1_catchments_edited_sf %>% filter(PRMS_segid %in% over_50_km2$PRMS_segid), color = 'blue', col.regions = 'transparent', lwd = 2)+
  mapview(p1_catchments_edited_sf, col.regions = 'red', alpha.regions = 0.3)

Checking area diff between the upstream targets - p1_catchments_edited_sf vs p2_NLCD_LC_w_catchment_area

# Check areas of upstream targets used to generate the land use targets above. 

targets::tar_load(p1_catchments_edited_sf)
targets::tar_load(p2_NLCD_LC_w_catchment_area)

t1 <- p2_NLCD_LC_w_catchment_area %>% group_by(PRMS_segid) %>% 
  summarise(PRMS_area_km2 = sum(AREASQKM))

t2 <- p1_catchments_edited_sf %>% sf::st_drop_geometry() %>% 
  group_by(PRMS_segid) %>% summarise(PRMS_area_km2 = sum(hru_area_m2)/10^6)


df_2 <- t1 %>% left_join(t2, by = 'PRMS_segid') %>% 
  mutate(ab_diff = abs(PRMS_area_km2.x - PRMS_area_km2.y)) %>%
  mutate(ab_diff_categorized = factor(case_when(ab_diff > 50 ~ '>50 km2',
                                                ab_diff > 10 ~ '>10 km2',
                                                ab_diff > 5 ~ '>5 km2',
                                                ab_diff >1 ~ '>1 km2',
                                                TRUE ~ '< 1 km2'),
                                      levels = c('< 1 km2', '>1 km2', '>5 km2', '>10 km2', '>50 km2'))) 

ggplot(df_2, aes(x = ab_diff_categorized))+
  geom_bar()+
  scale_y_continuous(n.breaks = 15)+theme_bw()+
  labs(caption ='\n Ab difference between area (km2) of catchment polygons in p1_catchments_edited_sf and xwalk area from land use prop dataset\n (2000) and NLCD land use dataset (2001) grouped into 4 categories')

the same